{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 美国各州人口数据分析\n",
    "首先导入文件，并查看数据样本"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 加载数据\n",
    "- state-abbrevs.csv\n",
    "- state-areas.CsV\n",
    "- state-population.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "abb = pd.read_csv('./data/12_美国人口数据分析项目/state-abbrevs.csv')\n",
    "areas = pd.read_csv('./data/12_美国人口数据分析项目/state-areas.csv')\n",
    "pop = pd.read_csv('./data/12_美国人口数据分析项目/state-population.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state</th>\n",
       "      <th>area (sq. mi)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alabama</td>\n",
       "      <td>52423</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Alaska</td>\n",
       "      <td>656425</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Arizona</td>\n",
       "      <td>114006</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Arkansas</td>\n",
       "      <td>53182</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>California</td>\n",
       "      <td>163707</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        state  area (sq. mi)\n",
       "0     Alabama          52423\n",
       "1      Alaska         656425\n",
       "2     Arizona         114006\n",
       "3    Arkansas          53182\n",
       "4  California         163707"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "(52, 2)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state</th>\n",
       "      <th>abbreviation</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alabama</td>\n",
       "      <td>AL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Alaska</td>\n",
       "      <td>AK</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Arizona</td>\n",
       "      <td>AZ</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Arkansas</td>\n",
       "      <td>AR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>California</td>\n",
       "      <td>CA</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        state abbreviation\n",
       "0     Alabama           AL\n",
       "1      Alaska           AK\n",
       "2     Arizona           AZ\n",
       "3    Arkansas           AR\n",
       "4  California           CA"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "(51, 2)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AL</td>\n",
       "      <td>under18</td>\n",
       "      <td>2012</td>\n",
       "      <td>1117489.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AL</td>\n",
       "      <td>total</td>\n",
       "      <td>2012</td>\n",
       "      <td>4817528.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AL</td>\n",
       "      <td>under18</td>\n",
       "      <td>2010</td>\n",
       "      <td>1130966.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AL</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>4785570.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AL</td>\n",
       "      <td>under18</td>\n",
       "      <td>2011</td>\n",
       "      <td>1125763.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  state/region     ages  year  population\n",
       "0           AL  under18  2012   1117489.0\n",
       "1           AL    total  2012   4817528.0\n",
       "2           AL  under18  2010   1130966.0\n",
       "3           AL    total  2010   4785570.0\n",
       "4           AL  under18  2011   1125763.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "(2544, 4)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(areas.head(), areas.shape)\n",
    "display(abb.head(), abb.shape)\n",
    "display(pop.head(), pop.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "合并population与abbrevs两个DataFrame\n",
    "- 分别依据state/region列和abbreviation列来合并。\n",
    "- 为了保留所有信息，使用外合并。merge()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "      <th>state</th>\n",
       "      <th>abbreviation</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>1990</td>\n",
       "      <td>553290.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>AK</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AK</td>\n",
       "      <td>under18</td>\n",
       "      <td>1990</td>\n",
       "      <td>177502.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>AK</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>1992</td>\n",
       "      <td>588736.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>AK</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AK</td>\n",
       "      <td>under18</td>\n",
       "      <td>1991</td>\n",
       "      <td>182180.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>AK</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AK</td>\n",
       "      <td>under18</td>\n",
       "      <td>1992</td>\n",
       "      <td>184878.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>AK</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  state/region     ages  year  population   state abbreviation\n",
       "0           AK    total  1990    553290.0  Alaska           AK\n",
       "1           AK  under18  1990    177502.0  Alaska           AK\n",
       "2           AK    total  1992    588736.0  Alaska           AK\n",
       "3           AK  under18  1991    182180.0  Alaska           AK\n",
       "4           AK  under18  1992    184878.0  Alaska           AK"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pop_abb = pop.merge(abb, how='outer', left_on='state/region', right_on='abbreviation')\n",
    "pop_abb.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "去除abbreviation的那一列(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "      <th>state</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>1990</td>\n",
       "      <td>553290.0</td>\n",
       "      <td>Alaska</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AK</td>\n",
       "      <td>under18</td>\n",
       "      <td>1990</td>\n",
       "      <td>177502.0</td>\n",
       "      <td>Alaska</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>1992</td>\n",
       "      <td>588736.0</td>\n",
       "      <td>Alaska</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AK</td>\n",
       "      <td>under18</td>\n",
       "      <td>1991</td>\n",
       "      <td>182180.0</td>\n",
       "      <td>Alaska</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AK</td>\n",
       "      <td>under18</td>\n",
       "      <td>1992</td>\n",
       "      <td>184878.0</td>\n",
       "      <td>Alaska</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  state/region     ages  year  population   state\n",
       "0           AK    total  1990    553290.0  Alaska\n",
       "1           AK  under18  1990    177502.0  Alaska\n",
       "2           AK    total  1992    588736.0  Alaska\n",
       "3           AK  under18  1991    182180.0  Alaska\n",
       "4           AK  under18  1992    184878.0  Alaska"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pop_abb.drop(columns='abbreviation', inplace=True, axis=1)\n",
    "pop_abb.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "查看存在缺失数据的列。\n",
    "- 使用pop_abb.isnull().any()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "state/region    False\n",
       "ages            False\n",
       "year            False\n",
       "population       True\n",
       "state            True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pop_abb.isnull().any()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "查看缺失数据\n",
    "- 根据数据是否缺失情况显示数据，如果缺失为True，那么显示"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "      <th>state</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1872</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>1990</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1873</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>1990</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1874</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>1991</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1875</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>1991</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1876</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>1993</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2203</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>309326295.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2204</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2011</td>\n",
       "      <td>73902222.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2205</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2011</td>\n",
       "      <td>311582564.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2206</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2012</td>\n",
       "      <td>73708179.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2207</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2012</td>\n",
       "      <td>313873685.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>96 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     state/region     ages  year   population state\n",
       "1872           PR  under18  1990          NaN   NaN\n",
       "1873           PR    total  1990          NaN   NaN\n",
       "1874           PR    total  1991          NaN   NaN\n",
       "1875           PR  under18  1991          NaN   NaN\n",
       "1876           PR    total  1993          NaN   NaN\n",
       "...           ...      ...   ...          ...   ...\n",
       "2203          USA    total  2010  309326295.0   NaN\n",
       "2204          USA  under18  2011   73902222.0   NaN\n",
       "2205          USA    total  2011  311582564.0   NaN\n",
       "2206          USA  under18  2012   73708179.0   NaN\n",
       "2207          USA    total  2012  313873685.0   NaN\n",
       "\n",
       "[96 rows x 5 columns]"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 存在空值的行\n",
    "cond = pop_abb.isnull().any(axis=1)\n",
    "cond\n",
    "\n",
    "# 查看缺失数据\n",
    "pop_abb.loc[cond]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "找到有哪些state/region使得state的值为NaN\n",
    "- 使用unique()查看非重复值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['PR', 'USA'], dtype=object)"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cond2 = pop_abb['state'].isnull()\n",
    "cond2\n",
    "\n",
    "pop_abb.loc[cond2]['state/region'].unique()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "填充state这一列的所有NaN\n",
    "- 找到的这些state/region的state项补上正确的值\n",
    "\n",
    "> PR => Puerto Rico    \n",
    "> USA => United State"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_30732\\1286741494.py:4: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  pop_abb['state'][cond] = 'Puerto Rico'\n"
     ]
    }
   ],
   "source": [
    "cond = pop_abb['state/region'] == 'PR'\n",
    "cond\n",
    "\n",
    "pop_abb['state'][cond] = 'Puerto Rico'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "      <th>state</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1872</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>1990</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1873</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>1990</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1874</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>1991</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1875</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>1991</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1876</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>1993</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1877</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>1993</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1878</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>1992</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1879</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>1992</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1880</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>1994</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1881</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>1994</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1882</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>1995</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1883</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>1995</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1884</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>1996</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1885</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>1996</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1886</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>1998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1887</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>1998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1888</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>1997</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1889</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>1997</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1890</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>1999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1891</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>1999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1892</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2000</td>\n",
       "      <td>3810605.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1893</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>2000</td>\n",
       "      <td>1089063.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1894</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2001</td>\n",
       "      <td>3818774.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1895</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>2001</td>\n",
       "      <td>1077566.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1896</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2002</td>\n",
       "      <td>3823701.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1897</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>2002</td>\n",
       "      <td>1065051.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1898</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2004</td>\n",
       "      <td>3826878.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1899</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>2004</td>\n",
       "      <td>1035919.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1900</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2003</td>\n",
       "      <td>3826095.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1901</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>2003</td>\n",
       "      <td>1050615.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1902</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2005</td>\n",
       "      <td>3821362.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1903</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>2005</td>\n",
       "      <td>1019447.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1904</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2006</td>\n",
       "      <td>3805214.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1905</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>2006</td>\n",
       "      <td>998543.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1906</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2007</td>\n",
       "      <td>3782995.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1907</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>2007</td>\n",
       "      <td>973613.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1908</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2008</td>\n",
       "      <td>3760866.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1909</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>2008</td>\n",
       "      <td>945705.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1910</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>2013</td>\n",
       "      <td>814068.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1911</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2013</td>\n",
       "      <td>3615086.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1912</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2009</td>\n",
       "      <td>3740410.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1913</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>2009</td>\n",
       "      <td>920794.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1914</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>3721208.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1915</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>2010</td>\n",
       "      <td>896945.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1916</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>2011</td>\n",
       "      <td>869327.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1917</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2011</td>\n",
       "      <td>3686580.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1918</th>\n",
       "      <td>PR</td>\n",
       "      <td>under18</td>\n",
       "      <td>2012</td>\n",
       "      <td>841740.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1919</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2012</td>\n",
       "      <td>3651545.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     state/region     ages  year  population        state\n",
       "1872           PR  under18  1990         NaN  Puerto Rico\n",
       "1873           PR    total  1990         NaN  Puerto Rico\n",
       "1874           PR    total  1991         NaN  Puerto Rico\n",
       "1875           PR  under18  1991         NaN  Puerto Rico\n",
       "1876           PR    total  1993         NaN  Puerto Rico\n",
       "1877           PR  under18  1993         NaN  Puerto Rico\n",
       "1878           PR  under18  1992         NaN  Puerto Rico\n",
       "1879           PR    total  1992         NaN  Puerto Rico\n",
       "1880           PR  under18  1994         NaN  Puerto Rico\n",
       "1881           PR    total  1994         NaN  Puerto Rico\n",
       "1882           PR    total  1995         NaN  Puerto Rico\n",
       "1883           PR  under18  1995         NaN  Puerto Rico\n",
       "1884           PR  under18  1996         NaN  Puerto Rico\n",
       "1885           PR    total  1996         NaN  Puerto Rico\n",
       "1886           PR  under18  1998         NaN  Puerto Rico\n",
       "1887           PR    total  1998         NaN  Puerto Rico\n",
       "1888           PR    total  1997         NaN  Puerto Rico\n",
       "1889           PR  under18  1997         NaN  Puerto Rico\n",
       "1890           PR    total  1999         NaN  Puerto Rico\n",
       "1891           PR  under18  1999         NaN  Puerto Rico\n",
       "1892           PR    total  2000   3810605.0  Puerto Rico\n",
       "1893           PR  under18  2000   1089063.0  Puerto Rico\n",
       "1894           PR    total  2001   3818774.0  Puerto Rico\n",
       "1895           PR  under18  2001   1077566.0  Puerto Rico\n",
       "1896           PR    total  2002   3823701.0  Puerto Rico\n",
       "1897           PR  under18  2002   1065051.0  Puerto Rico\n",
       "1898           PR    total  2004   3826878.0  Puerto Rico\n",
       "1899           PR  under18  2004   1035919.0  Puerto Rico\n",
       "1900           PR    total  2003   3826095.0  Puerto Rico\n",
       "1901           PR  under18  2003   1050615.0  Puerto Rico\n",
       "1902           PR    total  2005   3821362.0  Puerto Rico\n",
       "1903           PR  under18  2005   1019447.0  Puerto Rico\n",
       "1904           PR    total  2006   3805214.0  Puerto Rico\n",
       "1905           PR  under18  2006    998543.0  Puerto Rico\n",
       "1906           PR    total  2007   3782995.0  Puerto Rico\n",
       "1907           PR  under18  2007    973613.0  Puerto Rico\n",
       "1908           PR    total  2008   3760866.0  Puerto Rico\n",
       "1909           PR  under18  2008    945705.0  Puerto Rico\n",
       "1910           PR  under18  2013    814068.0  Puerto Rico\n",
       "1911           PR    total  2013   3615086.0  Puerto Rico\n",
       "1912           PR    total  2009   3740410.0  Puerto Rico\n",
       "1913           PR  under18  2009    920794.0  Puerto Rico\n",
       "1914           PR    total  2010   3721208.0  Puerto Rico\n",
       "1915           PR  under18  2010    896945.0  Puerto Rico\n",
       "1916           PR  under18  2011    869327.0  Puerto Rico\n",
       "1917           PR    total  2011   3686580.0  Puerto Rico\n",
       "1918           PR  under18  2012    841740.0  Puerto Rico\n",
       "1919           PR    total  2012   3651545.0  Puerto Rico"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pop_abb[cond]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_30732\\2183590898.py:4: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  pop_abb['state'][cond] = 'United State'\n"
     ]
    }
   ],
   "source": [
    "cond = pop_abb['state/region'] == 'USA'\n",
    "cond\n",
    "\n",
    "pop_abb['state'][cond] = 'United State'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "      <th>state</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2160</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>1990</td>\n",
       "      <td>64218512.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2161</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>1990</td>\n",
       "      <td>249622814.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2162</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>1991</td>\n",
       "      <td>252980942.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2163</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>1991</td>\n",
       "      <td>65313018.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2164</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>1992</td>\n",
       "      <td>66509177.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2165</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>1992</td>\n",
       "      <td>256514231.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2166</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>1993</td>\n",
       "      <td>259918595.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2167</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>1993</td>\n",
       "      <td>67594938.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2168</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>1994</td>\n",
       "      <td>68640936.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>1994</td>\n",
       "      <td>263125826.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2170</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>1995</td>\n",
       "      <td>69473140.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2171</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>1996</td>\n",
       "      <td>70233512.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2172</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>1995</td>\n",
       "      <td>266278403.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2173</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>1996</td>\n",
       "      <td>269394291.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2174</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>1997</td>\n",
       "      <td>272646932.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2175</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>1997</td>\n",
       "      <td>70920738.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>1998</td>\n",
       "      <td>71431406.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>1998</td>\n",
       "      <td>275854116.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2178</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>1999</td>\n",
       "      <td>71946051.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2179</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2000</td>\n",
       "      <td>282162411.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2180</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2000</td>\n",
       "      <td>72376189.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2181</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>1999</td>\n",
       "      <td>279040181.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2001</td>\n",
       "      <td>284968955.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2183</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2001</td>\n",
       "      <td>72671175.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2184</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2002</td>\n",
       "      <td>287625193.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2185</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2002</td>\n",
       "      <td>72936457.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2186</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2003</td>\n",
       "      <td>290107933.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2187</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2003</td>\n",
       "      <td>73100758.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2188</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2004</td>\n",
       "      <td>292805298.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2189</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2004</td>\n",
       "      <td>73297735.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2190</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2005</td>\n",
       "      <td>295516599.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2191</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2005</td>\n",
       "      <td>73523669.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2192</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2006</td>\n",
       "      <td>298379912.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2193</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2006</td>\n",
       "      <td>73757714.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2194</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2007</td>\n",
       "      <td>301231207.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2195</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2007</td>\n",
       "      <td>74019405.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2196</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2008</td>\n",
       "      <td>304093966.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2197</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2008</td>\n",
       "      <td>74104602.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2198</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2013</td>\n",
       "      <td>73585872.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2199</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2013</td>\n",
       "      <td>316128839.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2200</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2009</td>\n",
       "      <td>306771529.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2201</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2009</td>\n",
       "      <td>74134167.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2202</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2010</td>\n",
       "      <td>74119556.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2203</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>309326295.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2204</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2011</td>\n",
       "      <td>73902222.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2205</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2011</td>\n",
       "      <td>311582564.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2206</th>\n",
       "      <td>USA</td>\n",
       "      <td>under18</td>\n",
       "      <td>2012</td>\n",
       "      <td>73708179.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2207</th>\n",
       "      <td>USA</td>\n",
       "      <td>total</td>\n",
       "      <td>2012</td>\n",
       "      <td>313873685.0</td>\n",
       "      <td>United State</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     state/region     ages  year   population         state\n",
       "2160          USA  under18  1990   64218512.0  United State\n",
       "2161          USA    total  1990  249622814.0  United State\n",
       "2162          USA    total  1991  252980942.0  United State\n",
       "2163          USA  under18  1991   65313018.0  United State\n",
       "2164          USA  under18  1992   66509177.0  United State\n",
       "2165          USA    total  1992  256514231.0  United State\n",
       "2166          USA    total  1993  259918595.0  United State\n",
       "2167          USA  under18  1993   67594938.0  United State\n",
       "2168          USA  under18  1994   68640936.0  United State\n",
       "2169          USA    total  1994  263125826.0  United State\n",
       "2170          USA  under18  1995   69473140.0  United State\n",
       "2171          USA  under18  1996   70233512.0  United State\n",
       "2172          USA    total  1995  266278403.0  United State\n",
       "2173          USA    total  1996  269394291.0  United State\n",
       "2174          USA    total  1997  272646932.0  United State\n",
       "2175          USA  under18  1997   70920738.0  United State\n",
       "2176          USA  under18  1998   71431406.0  United State\n",
       "2177          USA    total  1998  275854116.0  United State\n",
       "2178          USA  under18  1999   71946051.0  United State\n",
       "2179          USA    total  2000  282162411.0  United State\n",
       "2180          USA  under18  2000   72376189.0  United State\n",
       "2181          USA    total  1999  279040181.0  United State\n",
       "2182          USA    total  2001  284968955.0  United State\n",
       "2183          USA  under18  2001   72671175.0  United State\n",
       "2184          USA    total  2002  287625193.0  United State\n",
       "2185          USA  under18  2002   72936457.0  United State\n",
       "2186          USA    total  2003  290107933.0  United State\n",
       "2187          USA  under18  2003   73100758.0  United State\n",
       "2188          USA    total  2004  292805298.0  United State\n",
       "2189          USA  under18  2004   73297735.0  United State\n",
       "2190          USA    total  2005  295516599.0  United State\n",
       "2191          USA  under18  2005   73523669.0  United State\n",
       "2192          USA    total  2006  298379912.0  United State\n",
       "2193          USA  under18  2006   73757714.0  United State\n",
       "2194          USA    total  2007  301231207.0  United State\n",
       "2195          USA  under18  2007   74019405.0  United State\n",
       "2196          USA    total  2008  304093966.0  United State\n",
       "2197          USA  under18  2008   74104602.0  United State\n",
       "2198          USA  under18  2013   73585872.0  United State\n",
       "2199          USA    total  2013  316128839.0  United State\n",
       "2200          USA    total  2009  306771529.0  United State\n",
       "2201          USA  under18  2009   74134167.0  United State\n",
       "2202          USA  under18  2010   74119556.0  United State\n",
       "2203          USA    total  2010  309326295.0  United State\n",
       "2204          USA  under18  2011   73902222.0  United State\n",
       "2205          USA    total  2011  311582564.0  United State\n",
       "2206          USA  under18  2012   73708179.0  United State\n",
       "2207          USA    total  2012  313873685.0  United State"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pop_abb[cond]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "合并各州面积数据areas，使用左合并。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state</th>\n",
       "      <th>area (sq. mi)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Alabama</td>\n",
       "      <td>52423</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Alaska</td>\n",
       "      <td>656425</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Arizona</td>\n",
       "      <td>114006</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     state  area (sq. mi)\n",
       "0  Alabama          52423\n",
       "1   Alaska         656425\n",
       "2  Arizona         114006"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "      <th>state</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>1990</td>\n",
       "      <td>553290.0</td>\n",
       "      <td>Alaska</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AK</td>\n",
       "      <td>under18</td>\n",
       "      <td>1990</td>\n",
       "      <td>177502.0</td>\n",
       "      <td>Alaska</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>1992</td>\n",
       "      <td>588736.0</td>\n",
       "      <td>Alaska</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  state/region     ages  year  population   state\n",
       "0           AK    total  1990    553290.0  Alaska\n",
       "1           AK  under18  1990    177502.0  Alaska\n",
       "2           AK    total  1992    588736.0  Alaska"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(areas.head(3), pop_abb.head(3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "      <th>state</th>\n",
       "      <th>area (sq. mi)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>1990</td>\n",
       "      <td>553290.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>656425.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AK</td>\n",
       "      <td>under18</td>\n",
       "      <td>1990</td>\n",
       "      <td>177502.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>656425.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>1992</td>\n",
       "      <td>588736.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>656425.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AK</td>\n",
       "      <td>under18</td>\n",
       "      <td>1991</td>\n",
       "      <td>182180.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>656425.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AK</td>\n",
       "      <td>under18</td>\n",
       "      <td>1992</td>\n",
       "      <td>184878.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>656425.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  state/region     ages  year  population   state  area (sq. mi)\n",
       "0           AK    total  1990    553290.0  Alaska       656425.0\n",
       "1           AK  under18  1990    177502.0  Alaska       656425.0\n",
       "2           AK    total  1992    588736.0  Alaska       656425.0\n",
       "3           AK  under18  1991    182180.0  Alaska       656425.0\n",
       "4           AK  under18  1992    184878.0  Alaska       656425.0"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pop_abb_areas = pop_abb.merge(areas, how='left')\n",
    "pop_abb_areas.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "继续寻找存在缺失数据的列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 2544 entries, 0 to 2543\n",
      "Data columns (total 6 columns):\n",
      " #   Column         Non-Null Count  Dtype  \n",
      "---  ------         --------------  -----  \n",
      " 0   state/region   2544 non-null   object \n",
      " 1   ages           2544 non-null   object \n",
      " 2   year           2544 non-null   int64  \n",
      " 3   population     2524 non-null   float64\n",
      " 4   state          2544 non-null   object \n",
      " 5   area (sq. mi)  2496 non-null   float64\n",
      "dtypes: float64(2), int64(1), object(3)\n",
      "memory usage: 119.4+ KB\n"
     ]
    }
   ],
   "source": [
    "pop_abb_areas.isnull().any()\n",
    "\n",
    "pop_abb_areas.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "我们会发现area(sq.mi)这一列有缺失数昺，为了找出是哪一行，我们需要找出是哪个state没有数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['United State'], dtype=object)"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cond = pop_abb_areas['area (sq. mi)'].isnull()\n",
    "cond\n",
    "\n",
    "pop_abb_areas['state'][cond].unique()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "去除含有缺失数据的行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "      <th>state</th>\n",
       "      <th>area (sq. mi)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>1990</td>\n",
       "      <td>553290.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>656425.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AK</td>\n",
       "      <td>under18</td>\n",
       "      <td>1990</td>\n",
       "      <td>177502.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>656425.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>1992</td>\n",
       "      <td>588736.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>656425.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AK</td>\n",
       "      <td>under18</td>\n",
       "      <td>1991</td>\n",
       "      <td>182180.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>656425.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AK</td>\n",
       "      <td>under18</td>\n",
       "      <td>1992</td>\n",
       "      <td>184878.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>656425.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2539</th>\n",
       "      <td>WY</td>\n",
       "      <td>under18</td>\n",
       "      <td>1993</td>\n",
       "      <td>137458.0</td>\n",
       "      <td>Wyoming</td>\n",
       "      <td>97818.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2540</th>\n",
       "      <td>WY</td>\n",
       "      <td>total</td>\n",
       "      <td>1991</td>\n",
       "      <td>459260.0</td>\n",
       "      <td>Wyoming</td>\n",
       "      <td>97818.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2541</th>\n",
       "      <td>WY</td>\n",
       "      <td>under18</td>\n",
       "      <td>1991</td>\n",
       "      <td>136720.0</td>\n",
       "      <td>Wyoming</td>\n",
       "      <td>97818.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2542</th>\n",
       "      <td>WY</td>\n",
       "      <td>under18</td>\n",
       "      <td>1990</td>\n",
       "      <td>136078.0</td>\n",
       "      <td>Wyoming</td>\n",
       "      <td>97818.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2543</th>\n",
       "      <td>WY</td>\n",
       "      <td>total</td>\n",
       "      <td>1990</td>\n",
       "      <td>453690.0</td>\n",
       "      <td>Wyoming</td>\n",
       "      <td>97818.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2476 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     state/region     ages  year  population    state  area (sq. mi)\n",
       "0              AK    total  1990    553290.0   Alaska       656425.0\n",
       "1              AK  under18  1990    177502.0   Alaska       656425.0\n",
       "2              AK    total  1992    588736.0   Alaska       656425.0\n",
       "3              AK  under18  1991    182180.0   Alaska       656425.0\n",
       "4              AK  under18  1992    184878.0   Alaska       656425.0\n",
       "...           ...      ...   ...         ...      ...            ...\n",
       "2539           WY  under18  1993    137458.0  Wyoming        97818.0\n",
       "2540           WY    total  1991    459260.0  Wyoming        97818.0\n",
       "2541           WY  under18  1991    136720.0  Wyoming        97818.0\n",
       "2542           WY  under18  1990    136078.0  Wyoming        97818.0\n",
       "2543           WY    total  1990    453690.0  Wyoming        97818.0\n",
       "\n",
       "[2476 rows x 6 columns]"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cond = pop_abb_areas.notnull().all(axis=1)\n",
    "cond\n",
    "\n",
    "pop_abb_areas2 = pop_abb_areas.loc[cond]\n",
    "pop_abb_areas2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "再查看数据是否缺失"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "state/region     False\n",
       "ages             False\n",
       "year             False\n",
       "population       False\n",
       "state            False\n",
       "area (sq. mi)    False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pop_abb_areas2.isnull().any()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "找出2010年的全民人口数据\n",
    "- df.query(查询语句)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "      <th>state</th>\n",
       "      <th>area (sq. mi)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>713868.0</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>656425.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>AL</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>4785570.0</td>\n",
       "      <td>Alabama</td>\n",
       "      <td>52423.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>141</th>\n",
       "      <td>AR</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>2922280.0</td>\n",
       "      <td>Arkansas</td>\n",
       "      <td>53182.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>AZ</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>6408790.0</td>\n",
       "      <td>Arizona</td>\n",
       "      <td>114006.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>197</th>\n",
       "      <td>CA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>37333601.0</td>\n",
       "      <td>California</td>\n",
       "      <td>163707.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>283</th>\n",
       "      <td>CO</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>5048196.0</td>\n",
       "      <td>Colorado</td>\n",
       "      <td>104100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>293</th>\n",
       "      <td>CT</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>3579210.0</td>\n",
       "      <td>Connecticut</td>\n",
       "      <td>5544.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>341</th>\n",
       "      <td>DC</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>605125.0</td>\n",
       "      <td>District of Columbia</td>\n",
       "      <td>68.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>427</th>\n",
       "      <td>DE</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>899711.0</td>\n",
       "      <td>Delaware</td>\n",
       "      <td>1954.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>475</th>\n",
       "      <td>FL</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>18846054.0</td>\n",
       "      <td>Florida</td>\n",
       "      <td>65758.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>485</th>\n",
       "      <td>GA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>9713248.0</td>\n",
       "      <td>Georgia</td>\n",
       "      <td>59441.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>570</th>\n",
       "      <td>HI</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1363731.0</td>\n",
       "      <td>Hawaii</td>\n",
       "      <td>10932.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>618</th>\n",
       "      <td>IA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>3050314.0</td>\n",
       "      <td>Iowa</td>\n",
       "      <td>56276.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>629</th>\n",
       "      <td>ID</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1570718.0</td>\n",
       "      <td>Idaho</td>\n",
       "      <td>83574.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>714</th>\n",
       "      <td>IL</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>12839695.0</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>57918.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>725</th>\n",
       "      <td>IN</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>6489965.0</td>\n",
       "      <td>Indiana</td>\n",
       "      <td>36420.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>773</th>\n",
       "      <td>KS</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>2858910.0</td>\n",
       "      <td>Kansas</td>\n",
       "      <td>82282.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>858</th>\n",
       "      <td>KY</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>4347698.0</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>40411.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>869</th>\n",
       "      <td>LA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>4545392.0</td>\n",
       "      <td>Louisiana</td>\n",
       "      <td>51843.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>954</th>\n",
       "      <td>MA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>6563263.0</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>10555.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>965</th>\n",
       "      <td>MD</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>5787193.0</td>\n",
       "      <td>Maryland</td>\n",
       "      <td>12407.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1050</th>\n",
       "      <td>ME</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1327366.0</td>\n",
       "      <td>Maine</td>\n",
       "      <td>35387.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1061</th>\n",
       "      <td>MI</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>9876149.0</td>\n",
       "      <td>Michigan</td>\n",
       "      <td>96810.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1146</th>\n",
       "      <td>MN</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>5310337.0</td>\n",
       "      <td>Minnesota</td>\n",
       "      <td>86943.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1194</th>\n",
       "      <td>MO</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>5996063.0</td>\n",
       "      <td>Missouri</td>\n",
       "      <td>69709.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1205</th>\n",
       "      <td>MS</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>2970047.0</td>\n",
       "      <td>Mississippi</td>\n",
       "      <td>48434.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1253</th>\n",
       "      <td>MT</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>990527.0</td>\n",
       "      <td>Montana</td>\n",
       "      <td>147046.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1338</th>\n",
       "      <td>NC</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>9559533.0</td>\n",
       "      <td>North Carolina</td>\n",
       "      <td>53821.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1349</th>\n",
       "      <td>ND</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>674344.0</td>\n",
       "      <td>North Dakota</td>\n",
       "      <td>70704.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1434</th>\n",
       "      <td>NE</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1829838.0</td>\n",
       "      <td>Nebraska</td>\n",
       "      <td>77358.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1482</th>\n",
       "      <td>NH</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1316614.0</td>\n",
       "      <td>New Hampshire</td>\n",
       "      <td>9351.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1493</th>\n",
       "      <td>NJ</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>8802707.0</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>8722.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578</th>\n",
       "      <td>NM</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>2064982.0</td>\n",
       "      <td>New Mexico</td>\n",
       "      <td>121593.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1589</th>\n",
       "      <td>NV</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>2703230.0</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>110567.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1637</th>\n",
       "      <td>NY</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>19398228.0</td>\n",
       "      <td>New York</td>\n",
       "      <td>54475.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1722</th>\n",
       "      <td>OH</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>11545435.0</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>44828.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1733</th>\n",
       "      <td>OK</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>3759263.0</td>\n",
       "      <td>Oklahoma</td>\n",
       "      <td>69903.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1818</th>\n",
       "      <td>OR</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>3837208.0</td>\n",
       "      <td>Oregon</td>\n",
       "      <td>98386.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1829</th>\n",
       "      <td>PA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>12710472.0</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>46058.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1914</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>3721208.0</td>\n",
       "      <td>Puerto Rico</td>\n",
       "      <td>3515.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1962</th>\n",
       "      <td>RI</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1052669.0</td>\n",
       "      <td>Rhode Island</td>\n",
       "      <td>1545.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1973</th>\n",
       "      <td>SC</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>4636361.0</td>\n",
       "      <td>South Carolina</td>\n",
       "      <td>32007.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2058</th>\n",
       "      <td>SD</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>816211.0</td>\n",
       "      <td>South Dakota</td>\n",
       "      <td>77121.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2069</th>\n",
       "      <td>TN</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>6356683.0</td>\n",
       "      <td>Tennessee</td>\n",
       "      <td>42146.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2154</th>\n",
       "      <td>TX</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>25245178.0</td>\n",
       "      <td>Texas</td>\n",
       "      <td>268601.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2213</th>\n",
       "      <td>UT</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>2774424.0</td>\n",
       "      <td>Utah</td>\n",
       "      <td>84904.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2261</th>\n",
       "      <td>VA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>8024417.0</td>\n",
       "      <td>Virginia</td>\n",
       "      <td>42769.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2346</th>\n",
       "      <td>VT</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>625793.0</td>\n",
       "      <td>Vermont</td>\n",
       "      <td>9615.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2394</th>\n",
       "      <td>WA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>6742256.0</td>\n",
       "      <td>Washington</td>\n",
       "      <td>71303.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2442</th>\n",
       "      <td>WI</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>5689060.0</td>\n",
       "      <td>Wisconsin</td>\n",
       "      <td>65503.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2453</th>\n",
       "      <td>WV</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1854146.0</td>\n",
       "      <td>West Virginia</td>\n",
       "      <td>24231.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2501</th>\n",
       "      <td>WY</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>564222.0</td>\n",
       "      <td>Wyoming</td>\n",
       "      <td>97818.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     state/region   ages  year  population                 state  \\\n",
       "43             AK  total  2010    713868.0                Alaska   \n",
       "51             AL  total  2010   4785570.0               Alabama   \n",
       "141            AR  total  2010   2922280.0              Arkansas   \n",
       "149            AZ  total  2010   6408790.0               Arizona   \n",
       "197            CA  total  2010  37333601.0            California   \n",
       "283            CO  total  2010   5048196.0              Colorado   \n",
       "293            CT  total  2010   3579210.0           Connecticut   \n",
       "341            DC  total  2010    605125.0  District of Columbia   \n",
       "427            DE  total  2010    899711.0              Delaware   \n",
       "475            FL  total  2010  18846054.0               Florida   \n",
       "485            GA  total  2010   9713248.0               Georgia   \n",
       "570            HI  total  2010   1363731.0                Hawaii   \n",
       "618            IA  total  2010   3050314.0                  Iowa   \n",
       "629            ID  total  2010   1570718.0                 Idaho   \n",
       "714            IL  total  2010  12839695.0              Illinois   \n",
       "725            IN  total  2010   6489965.0               Indiana   \n",
       "773            KS  total  2010   2858910.0                Kansas   \n",
       "858            KY  total  2010   4347698.0              Kentucky   \n",
       "869            LA  total  2010   4545392.0             Louisiana   \n",
       "954            MA  total  2010   6563263.0         Massachusetts   \n",
       "965            MD  total  2010   5787193.0              Maryland   \n",
       "1050           ME  total  2010   1327366.0                 Maine   \n",
       "1061           MI  total  2010   9876149.0              Michigan   \n",
       "1146           MN  total  2010   5310337.0             Minnesota   \n",
       "1194           MO  total  2010   5996063.0              Missouri   \n",
       "1205           MS  total  2010   2970047.0           Mississippi   \n",
       "1253           MT  total  2010    990527.0               Montana   \n",
       "1338           NC  total  2010   9559533.0        North Carolina   \n",
       "1349           ND  total  2010    674344.0          North Dakota   \n",
       "1434           NE  total  2010   1829838.0              Nebraska   \n",
       "1482           NH  total  2010   1316614.0         New Hampshire   \n",
       "1493           NJ  total  2010   8802707.0            New Jersey   \n",
       "1578           NM  total  2010   2064982.0            New Mexico   \n",
       "1589           NV  total  2010   2703230.0                Nevada   \n",
       "1637           NY  total  2010  19398228.0              New York   \n",
       "1722           OH  total  2010  11545435.0                  Ohio   \n",
       "1733           OK  total  2010   3759263.0              Oklahoma   \n",
       "1818           OR  total  2010   3837208.0                Oregon   \n",
       "1829           PA  total  2010  12710472.0          Pennsylvania   \n",
       "1914           PR  total  2010   3721208.0           Puerto Rico   \n",
       "1962           RI  total  2010   1052669.0          Rhode Island   \n",
       "1973           SC  total  2010   4636361.0        South Carolina   \n",
       "2058           SD  total  2010    816211.0          South Dakota   \n",
       "2069           TN  total  2010   6356683.0             Tennessee   \n",
       "2154           TX  total  2010  25245178.0                 Texas   \n",
       "2213           UT  total  2010   2774424.0                  Utah   \n",
       "2261           VA  total  2010   8024417.0              Virginia   \n",
       "2346           VT  total  2010    625793.0               Vermont   \n",
       "2394           WA  total  2010   6742256.0            Washington   \n",
       "2442           WI  total  2010   5689060.0             Wisconsin   \n",
       "2453           WV  total  2010   1854146.0         West Virginia   \n",
       "2501           WY  total  2010    564222.0               Wyoming   \n",
       "\n",
       "      area (sq. mi)  \n",
       "43         656425.0  \n",
       "51          52423.0  \n",
       "141         53182.0  \n",
       "149        114006.0  \n",
       "197        163707.0  \n",
       "283        104100.0  \n",
       "293          5544.0  \n",
       "341            68.0  \n",
       "427          1954.0  \n",
       "475         65758.0  \n",
       "485         59441.0  \n",
       "570         10932.0  \n",
       "618         56276.0  \n",
       "629         83574.0  \n",
       "714         57918.0  \n",
       "725         36420.0  \n",
       "773         82282.0  \n",
       "858         40411.0  \n",
       "869         51843.0  \n",
       "954         10555.0  \n",
       "965         12407.0  \n",
       "1050        35387.0  \n",
       "1061        96810.0  \n",
       "1146        86943.0  \n",
       "1194        69709.0  \n",
       "1205        48434.0  \n",
       "1253       147046.0  \n",
       "1338        53821.0  \n",
       "1349        70704.0  \n",
       "1434        77358.0  \n",
       "1482         9351.0  \n",
       "1493         8722.0  \n",
       "1578       121593.0  \n",
       "1589       110567.0  \n",
       "1637        54475.0  \n",
       "1722        44828.0  \n",
       "1733        69903.0  \n",
       "1818        98386.0  \n",
       "1829        46058.0  \n",
       "1914         3515.0  \n",
       "1962         1545.0  \n",
       "1973        32007.0  \n",
       "2058        77121.0  \n",
       "2069        42146.0  \n",
       "2154       268601.0  \n",
       "2213        84904.0  \n",
       "2261        42769.0  \n",
       "2346         9615.0  \n",
       "2394        71303.0  \n",
       "2442        65503.0  \n",
       "2453        24231.0  \n",
       "2501        97818.0  "
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2010 = pop_abb_areas2.query('year == 2010 and ages == \"total\"')\n",
    "df_2010"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "以state列作为新的行索引\n",
    "- 使用set index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "      <th>area (sq. mi)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>state</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alaska</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>713868.0</td>\n",
       "      <td>656425.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Alabama</th>\n",
       "      <td>AL</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>4785570.0</td>\n",
       "      <td>52423.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Arkansas</th>\n",
       "      <td>AR</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>2922280.0</td>\n",
       "      <td>53182.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Arizona</th>\n",
       "      <td>AZ</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>6408790.0</td>\n",
       "      <td>114006.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>California</th>\n",
       "      <td>CA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>37333601.0</td>\n",
       "      <td>163707.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>CO</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>5048196.0</td>\n",
       "      <td>104100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Connecticut</th>\n",
       "      <td>CT</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>3579210.0</td>\n",
       "      <td>5544.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>District of Columbia</th>\n",
       "      <td>DC</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>605125.0</td>\n",
       "      <td>68.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Delaware</th>\n",
       "      <td>DE</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>899711.0</td>\n",
       "      <td>1954.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Florida</th>\n",
       "      <td>FL</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>18846054.0</td>\n",
       "      <td>65758.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Georgia</th>\n",
       "      <td>GA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>9713248.0</td>\n",
       "      <td>59441.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Hawaii</th>\n",
       "      <td>HI</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1363731.0</td>\n",
       "      <td>10932.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iowa</th>\n",
       "      <td>IA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>3050314.0</td>\n",
       "      <td>56276.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Idaho</th>\n",
       "      <td>ID</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1570718.0</td>\n",
       "      <td>83574.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Illinois</th>\n",
       "      <td>IL</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>12839695.0</td>\n",
       "      <td>57918.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Indiana</th>\n",
       "      <td>IN</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>6489965.0</td>\n",
       "      <td>36420.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Kansas</th>\n",
       "      <td>KS</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>2858910.0</td>\n",
       "      <td>82282.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Kentucky</th>\n",
       "      <td>KY</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>4347698.0</td>\n",
       "      <td>40411.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Louisiana</th>\n",
       "      <td>LA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>4545392.0</td>\n",
       "      <td>51843.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Massachusetts</th>\n",
       "      <td>MA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>6563263.0</td>\n",
       "      <td>10555.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Maryland</th>\n",
       "      <td>MD</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>5787193.0</td>\n",
       "      <td>12407.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Maine</th>\n",
       "      <td>ME</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1327366.0</td>\n",
       "      <td>35387.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Michigan</th>\n",
       "      <td>MI</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>9876149.0</td>\n",
       "      <td>96810.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Minnesota</th>\n",
       "      <td>MN</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>5310337.0</td>\n",
       "      <td>86943.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Missouri</th>\n",
       "      <td>MO</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>5996063.0</td>\n",
       "      <td>69709.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mississippi</th>\n",
       "      <td>MS</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>2970047.0</td>\n",
       "      <td>48434.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Montana</th>\n",
       "      <td>MT</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>990527.0</td>\n",
       "      <td>147046.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>North Carolina</th>\n",
       "      <td>NC</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>9559533.0</td>\n",
       "      <td>53821.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>North Dakota</th>\n",
       "      <td>ND</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>674344.0</td>\n",
       "      <td>70704.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Nebraska</th>\n",
       "      <td>NE</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1829838.0</td>\n",
       "      <td>77358.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New Hampshire</th>\n",
       "      <td>NH</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1316614.0</td>\n",
       "      <td>9351.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New Jersey</th>\n",
       "      <td>NJ</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>8802707.0</td>\n",
       "      <td>8722.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New Mexico</th>\n",
       "      <td>NM</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>2064982.0</td>\n",
       "      <td>121593.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Nevada</th>\n",
       "      <td>NV</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>2703230.0</td>\n",
       "      <td>110567.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>NY</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>19398228.0</td>\n",
       "      <td>54475.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>OH</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>11545435.0</td>\n",
       "      <td>44828.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Oklahoma</th>\n",
       "      <td>OK</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>3759263.0</td>\n",
       "      <td>69903.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Oregon</th>\n",
       "      <td>OR</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>3837208.0</td>\n",
       "      <td>98386.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pennsylvania</th>\n",
       "      <td>PA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>12710472.0</td>\n",
       "      <td>46058.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Puerto Rico</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>3721208.0</td>\n",
       "      <td>3515.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rhode Island</th>\n",
       "      <td>RI</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1052669.0</td>\n",
       "      <td>1545.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>South Carolina</th>\n",
       "      <td>SC</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>4636361.0</td>\n",
       "      <td>32007.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>South Dakota</th>\n",
       "      <td>SD</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>816211.0</td>\n",
       "      <td>77121.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tennessee</th>\n",
       "      <td>TN</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>6356683.0</td>\n",
       "      <td>42146.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Texas</th>\n",
       "      <td>TX</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>25245178.0</td>\n",
       "      <td>268601.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Utah</th>\n",
       "      <td>UT</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>2774424.0</td>\n",
       "      <td>84904.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Virginia</th>\n",
       "      <td>VA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>8024417.0</td>\n",
       "      <td>42769.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Vermont</th>\n",
       "      <td>VT</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>625793.0</td>\n",
       "      <td>9615.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Washington</th>\n",
       "      <td>WA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>6742256.0</td>\n",
       "      <td>71303.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Wisconsin</th>\n",
       "      <td>WI</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>5689060.0</td>\n",
       "      <td>65503.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>West Virginia</th>\n",
       "      <td>WV</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1854146.0</td>\n",
       "      <td>24231.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Wyoming</th>\n",
       "      <td>WY</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>564222.0</td>\n",
       "      <td>97818.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     state/region   ages  year  population  area (sq. mi)\n",
       "state                                                                    \n",
       "Alaska                         AK  total  2010    713868.0       656425.0\n",
       "Alabama                        AL  total  2010   4785570.0        52423.0\n",
       "Arkansas                       AR  total  2010   2922280.0        53182.0\n",
       "Arizona                        AZ  total  2010   6408790.0       114006.0\n",
       "California                     CA  total  2010  37333601.0       163707.0\n",
       "Colorado                       CO  total  2010   5048196.0       104100.0\n",
       "Connecticut                    CT  total  2010   3579210.0         5544.0\n",
       "District of Columbia           DC  total  2010    605125.0           68.0\n",
       "Delaware                       DE  total  2010    899711.0         1954.0\n",
       "Florida                        FL  total  2010  18846054.0        65758.0\n",
       "Georgia                        GA  total  2010   9713248.0        59441.0\n",
       "Hawaii                         HI  total  2010   1363731.0        10932.0\n",
       "Iowa                           IA  total  2010   3050314.0        56276.0\n",
       "Idaho                          ID  total  2010   1570718.0        83574.0\n",
       "Illinois                       IL  total  2010  12839695.0        57918.0\n",
       "Indiana                        IN  total  2010   6489965.0        36420.0\n",
       "Kansas                         KS  total  2010   2858910.0        82282.0\n",
       "Kentucky                       KY  total  2010   4347698.0        40411.0\n",
       "Louisiana                      LA  total  2010   4545392.0        51843.0\n",
       "Massachusetts                  MA  total  2010   6563263.0        10555.0\n",
       "Maryland                       MD  total  2010   5787193.0        12407.0\n",
       "Maine                          ME  total  2010   1327366.0        35387.0\n",
       "Michigan                       MI  total  2010   9876149.0        96810.0\n",
       "Minnesota                      MN  total  2010   5310337.0        86943.0\n",
       "Missouri                       MO  total  2010   5996063.0        69709.0\n",
       "Mississippi                    MS  total  2010   2970047.0        48434.0\n",
       "Montana                        MT  total  2010    990527.0       147046.0\n",
       "North Carolina                 NC  total  2010   9559533.0        53821.0\n",
       "North Dakota                   ND  total  2010    674344.0        70704.0\n",
       "Nebraska                       NE  total  2010   1829838.0        77358.0\n",
       "New Hampshire                  NH  total  2010   1316614.0         9351.0\n",
       "New Jersey                     NJ  total  2010   8802707.0         8722.0\n",
       "New Mexico                     NM  total  2010   2064982.0       121593.0\n",
       "Nevada                         NV  total  2010   2703230.0       110567.0\n",
       "New York                       NY  total  2010  19398228.0        54475.0\n",
       "Ohio                           OH  total  2010  11545435.0        44828.0\n",
       "Oklahoma                       OK  total  2010   3759263.0        69903.0\n",
       "Oregon                         OR  total  2010   3837208.0        98386.0\n",
       "Pennsylvania                   PA  total  2010  12710472.0        46058.0\n",
       "Puerto Rico                    PR  total  2010   3721208.0         3515.0\n",
       "Rhode Island                   RI  total  2010   1052669.0         1545.0\n",
       "South Carolina                 SC  total  2010   4636361.0        32007.0\n",
       "South Dakota                   SD  total  2010    816211.0        77121.0\n",
       "Tennessee                      TN  total  2010   6356683.0        42146.0\n",
       "Texas                          TX  total  2010  25245178.0       268601.0\n",
       "Utah                           UT  total  2010   2774424.0        84904.0\n",
       "Virginia                       VA  total  2010   8024417.0        42769.0\n",
       "Vermont                        VT  total  2010    625793.0         9615.0\n",
       "Washington                     WA  total  2010   6742256.0        71303.0\n",
       "Wisconsin                      WI  total  2010   5689060.0        65503.0\n",
       "West Virginia                  WV  total  2010   1854146.0        24231.0\n",
       "Wyoming                        WY  total  2010    564222.0        97818.0"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2010.set_index('state', inplace=True)\n",
    "df_2010"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "计算人口密度density"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "      <th>area (sq. mi)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>state</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alaska</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>713868.0</td>\n",
       "      <td>656425.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Alabama</th>\n",
       "      <td>AL</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>4785570.0</td>\n",
       "      <td>52423.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Arkansas</th>\n",
       "      <td>AR</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>2922280.0</td>\n",
       "      <td>53182.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Arizona</th>\n",
       "      <td>AZ</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>6408790.0</td>\n",
       "      <td>114006.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>California</th>\n",
       "      <td>CA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>37333601.0</td>\n",
       "      <td>163707.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           state/region   ages  year  population  area (sq. mi)\n",
       "state                                                          \n",
       "Alaska               AK  total  2010    713868.0       656425.0\n",
       "Alabama              AL  total  2010   4785570.0        52423.0\n",
       "Arkansas             AR  total  2010   2922280.0        53182.0\n",
       "Arizona              AZ  total  2010   6408790.0       114006.0\n",
       "California           CA  total  2010  37333601.0       163707.0"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2010.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_30732\\2689996360.py:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  df_2010['density'] = df_2010['population'] / df_2010['area (sq. mi)']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "      <th>area (sq. mi)</th>\n",
       "      <th>density</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>state</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alaska</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>713868.0</td>\n",
       "      <td>656425.0</td>\n",
       "      <td>1.087509</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Alabama</th>\n",
       "      <td>AL</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>4785570.0</td>\n",
       "      <td>52423.0</td>\n",
       "      <td>91.287603</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Arkansas</th>\n",
       "      <td>AR</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>2922280.0</td>\n",
       "      <td>53182.0</td>\n",
       "      <td>54.948667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Arizona</th>\n",
       "      <td>AZ</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>6408790.0</td>\n",
       "      <td>114006.0</td>\n",
       "      <td>56.214497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>California</th>\n",
       "      <td>CA</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>37333601.0</td>\n",
       "      <td>163707.0</td>\n",
       "      <td>228.051342</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           state/region   ages  year  population  area (sq. mi)     density\n",
       "state                                                                      \n",
       "Alaska               AK  total  2010    713868.0       656425.0    1.087509\n",
       "Alabama              AL  total  2010   4785570.0        52423.0   91.287603\n",
       "Arkansas             AR  total  2010   2922280.0        53182.0   54.948667\n",
       "Arizona              AZ  total  2010   6408790.0       114006.0   56.214497\n",
       "California           CA  total  2010  37333601.0       163707.0  228.051342"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 人口密度\n",
    "df_2010['density'] = df_2010['population'] / df_2010['area (sq. mi)']\n",
    "df_2010.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "排序，并找出人口密度最高的五个州sort values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "      <th>area (sq. mi)</th>\n",
       "      <th>density</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>state</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>District of Columbia</th>\n",
       "      <td>DC</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>605125.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>8898.897059</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Puerto Rico</th>\n",
       "      <td>PR</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>3721208.0</td>\n",
       "      <td>3515.0</td>\n",
       "      <td>1058.665149</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New Jersey</th>\n",
       "      <td>NJ</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>8802707.0</td>\n",
       "      <td>8722.0</td>\n",
       "      <td>1009.253268</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rhode Island</th>\n",
       "      <td>RI</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>1052669.0</td>\n",
       "      <td>1545.0</td>\n",
       "      <td>681.339159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Connecticut</th>\n",
       "      <td>CT</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>3579210.0</td>\n",
       "      <td>5544.0</td>\n",
       "      <td>645.600649</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     state/region   ages  year  population  area (sq. mi)  \\\n",
       "state                                                                       \n",
       "District of Columbia           DC  total  2010    605125.0           68.0   \n",
       "Puerto Rico                    PR  total  2010   3721208.0         3515.0   \n",
       "New Jersey                     NJ  total  2010   8802707.0         8722.0   \n",
       "Rhode Island                   RI  total  2010   1052669.0         1545.0   \n",
       "Connecticut                    CT  total  2010   3579210.0         5544.0   \n",
       "\n",
       "                          density  \n",
       "state                              \n",
       "District of Columbia  8898.897059  \n",
       "Puerto Rico           1058.665149  \n",
       "New Jersey            1009.253268  \n",
       "Rhode Island           681.339159  \n",
       "Connecticut            645.600649  "
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2010.sort_values(by='density', ascending=False).head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "排序，找出人口密度最低的五个州"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state/region</th>\n",
       "      <th>ages</th>\n",
       "      <th>year</th>\n",
       "      <th>population</th>\n",
       "      <th>area (sq. mi)</th>\n",
       "      <th>density</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>state</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alaska</th>\n",
       "      <td>AK</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>713868.0</td>\n",
       "      <td>656425.0</td>\n",
       "      <td>1.087509</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Wyoming</th>\n",
       "      <td>WY</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>564222.0</td>\n",
       "      <td>97818.0</td>\n",
       "      <td>5.768079</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Montana</th>\n",
       "      <td>MT</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>990527.0</td>\n",
       "      <td>147046.0</td>\n",
       "      <td>6.736171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>North Dakota</th>\n",
       "      <td>ND</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>674344.0</td>\n",
       "      <td>70704.0</td>\n",
       "      <td>9.537565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>South Dakota</th>\n",
       "      <td>SD</td>\n",
       "      <td>total</td>\n",
       "      <td>2010</td>\n",
       "      <td>816211.0</td>\n",
       "      <td>77121.0</td>\n",
       "      <td>10.583512</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             state/region   ages  year  population  area (sq. mi)    density\n",
       "state                                                                       \n",
       "Alaska                 AK  total  2010    713868.0       656425.0   1.087509\n",
       "Wyoming                WY  total  2010    564222.0        97818.0   5.768079\n",
       "Montana                MT  total  2010    990527.0       147046.0   6.736171\n",
       "North Dakota           ND  total  2010    674344.0        70704.0   9.537565\n",
       "South Dakota           SD  total  2010    816211.0        77121.0  10.583512"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2010.sort_values(by='density').head()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
